Purpose: Demonstrate Model Creation using AWS boto3 sdk using Iris Multi-class dataset
Steps:
In [1]:
import boto3
import os
In [2]:
# Local Data Path
# Training, Eval Files
# Batch Evaluation Files
# Download Batch result to local drive
data_path = r'..\..\Data\ModelCreationDemo'
# Training and Test Files
training_file = 'iris_data_train.csv'
training_schema_file = 'iris_data_train.csv.schema'
batch_test_file = 'iris_data_classifier_test.csv'
In [3]:
# S3 Bucket name - bucket name needs to be globally unique across AWS (not just your account)
# Example (make sure you modify it to point to your s3 bucket)
s3_bucket_name = 'ml-course'
s3_folder_name = 'boto3_demo'
# s3_path will be s3_bucket_name/s3_folder_name/
In [4]:
# Initialize Session with appropriate user profile.
# Optionally, use a different profile (profile_name='mluser') instead of default profile
# Specify region where you want to create your ML Models and files
# http://docs.aws.amazon.com/machine-learning/latest/dg/regions-and-endpoints.html
# currently ML is available in US East (N. Virginia) us-east-1 and EU (Ireland) eu-west-1
session = boto3.Session(region_name = 'us-east-1',
profile_name = 'ml_user')
ml_client = session.client('machinelearning' )
In [5]:
def upload_files_to_s3():
s3Client = session.resource('s3')
fileNames = [training_file, training_schema_file, batch_test_file]
for fileName in fileNames:
filePath = os.path.join(data_path,fileName)
print(filePath)
# upload files to s3 bucket.
s3Client.Bucket(s3_bucket_name).upload_file(filePath, s3_folder_name + '/' + fileName)
In [6]:
upload_files_to_s3()
In [7]:
# Derived from
#https://github.com/awslabs/machine-learning-samples/blob/master/social-media/create-aml-model.py
def create_data_source(dataset_name,
s3_data_uri,
s3_schema_uri,
ds_type, percent_begin,
percent_end,
compute_statistics):
ds_id = "ds-boto3-iris-{0}".format(ds_type)
data_spec = {}
data_spec['DataLocationS3'] = s3_data_uri
data_spec['DataSchemaLocationS3'] = s3_schema_uri
data_spec['DataRearrangement'] = \
'{{"splitting":{{"percentBegin":{0},"percentEnd":{1},"strategy":"sequential"}}}}'.format(
percent_begin, percent_end)
response = ml_client.create_data_source_from_s3(
DataSourceId = ds_id,
DataSourceName = "{0}_[percentBegin={1}, percentEnd={2}]".format(dataset_name, percent_begin, percent_end),
DataSpec = data_spec,
ComputeStatistics = compute_statistics)
print("Creating {0} datasource".format(ds_type))
return response
In [8]:
# Create Training Data Source
s3_train_uri = "s3://{0}/{1}/{2}".format(s3_bucket_name, s3_folder_name, training_file)
s3_train_schema_uri = "s3://{0}/{1}/{2}".format(s3_bucket_name, s3_folder_name, training_schema_file)
In [9]:
s3_train_uri, s3_train_schema_uri
Out[9]:
In [10]:
train_datasource = create_data_source(
'iris_training',
s3_train_uri,
s3_train_schema_uri,
'Training',0, 70, True)
In [11]:
eval_datasource = create_data_source(
'iris_evaluation',
s3_train_uri,
s3_train_schema_uri,
'Evaluation', 70, 100, False)
In [12]:
print(train_datasource['DataSourceId'])
print(eval_datasource['DataSourceId'])
In [13]:
model_create_response = ml_client.create_ml_model(
MLModelId = 'ml-iris-demo',
MLModelName = 'ML model: iris-demo-from-code',
MLModelType = 'MULTICLASS',
TrainingDataSourceId = train_datasource['DataSourceId'])
In [14]:
model_create_response
Out[14]:
In [15]:
# Query 'Status': 'PENDING'|'INPROGRESS'|'FAILED'|'COMPLETED'|'DELETED',
ml_client.get_ml_model(MLModelId = model_create_response['MLModelId'])['Status']
Out[15]:
In [16]:
evaluation_response = ml_client.create_evaluation (
EvaluationId = 'eval-iris-demo',
EvaluationName = 'Eval ML model: iris-demo-from-code',
MLModelId = model_create_response['MLModelId'],
EvaluationDataSourceId = eval_datasource['DataSourceId'])
In [17]:
evaluation_response
Out[17]:
In [18]:
#Query 'Status': 'PENDING'|'INPROGRESS'|'FAILED'|'COMPLETED'|'DELETED'
eval_result = ml_client.get_evaluation(EvaluationId = evaluation_response['EvaluationId'])
In [19]:
eval_result['Status']
Out[19]:
In [20]:
eval_result['PerformanceMetrics']
Out[20]:
In [21]:
eval_result
Out[21]: